import pandas as pd

df = pd.read_csv("Survey_Final_Results.csv") # source: Adaptive Fuzzy String Matching: How to Merge Datasets with Only One (Messy) Identifying Field

# precision at 100% recall
def precision_at_100_percent_recall(df, col):
	# correct the label for F. James Sensenbrenner, Jr. (R)/ Jim Sensenbrenner (R)
	df.loc[df["amicus"] == "F. James Sensenbrenner, Jr. (R)", "label"] = 1
	print("Col", col)
	min_col_score = min(list(df[df["label"]==1][col]))
	threshold_df = df[df[col]>=min_col_score]
	print(sum(threshold_df["label"])/len(threshold_df))

precision_at_100_percent_recall(df, "AFSM_score")
precision_at_100_percent_recall(df, "levenshtein")
precision_at_100_percent_recall(df, "lcsstr")
precision_at_100_percent_recall(df, "cosine")
precision_at_100_percent_recall(df, "overlap")
precision_at_100_percent_recall(df, "jaccard")

df = pd.read_csv("experiment_2_v1_temp1.0.csv")
precision_at_100_percent_recall(df, "chatGPT_score")

df = pd.read_csv("experiment_2_v1_temp0.2.csv")
precision_at_100_percent_recall(df, "chatGPT_score")

df = pd.read_csv("experiment_2_v2_temp1.0.csv")
precision_at_100_percent_recall(df, "chatGPT_score")

df = pd.read_csv("experiment_2_v2_temp0.2.csv")
precision_at_100_percent_recall(df, "chatGPT_score")


